import pandas as pd
import numpy as np
from copy import copy

#Global variables
RAW_DIR = '../original/'
RAW_NAME = '2020-Post-Election-Audit-Results-Master-Sheet.xlsx'
SAVE_DIR = '../ready/'

#Proceed office by office. Start with congress
cong = pd.read_excel(f'{RAW_DIR}{RAW_NAME}', 'Congress', dtype={'Wd.': str})

#Variables to fill out:
#   state
#   town
#   district
#   date
#   office
#   precinct
#   candidate
#   original_votes
#   audited_votes
#   diff
cong = cong.rename(columns={
    'Unnamed: 0': 'district',
    'CITY/TOWN': 'town',
    'Pct.': 'precinct',
    'CANDIDATES': 'candidate',
    'ELECTION NIGHT': 'original_votes',
    'AUDIT': 'audited_votes',
    'DIFFERENCE': 'difference'
    })

cong = cong.fillna(method='ffill')

cong.precinct = cong['Wd.'].astype(str) + \
                '-' + \
                cong.precinct.astype(str)
cong = cong.drop(columns=['Wd.'], axis=1)

cong.candidate = cong.candidate.astype(str).str.upper()
cong.town = cong.town.astype(str).str.upper()

cong['date'] = '2020-11-03'
cong['office'] = 'US HOUSE'
cong['state'] = 'MASSACHUSETTS'

cong.to_csv(f'{SAVE_DIR}Congress_MA.csv', index=False)


#President and U.S. Senate
prsen = pd.read_excel(f'{RAW_DIR}{RAW_NAME}',
                      'STATEWIDE OFFICES')

prsen = prsen.rename(columns={
    'Unnamed: 0': 'town',
    'TOTAL BALLOTS CAST': 'TOTAL BALLOTS CAST Original',
    'TOTAL BALLOTS CAST.1': 'TOTAL BALLOTS CAST Audited',
    'BIDEN AND HARRIS Original ': 'BIDEN AND HARRIS Original'
    })

prsen['precinct'] = prsen['Unnamed: 1'].astype(str) + '-' + \
                    prsen['Unnamed: 2'].astype(str)

#Break into president and senate
pr = copy(prsen[['town',
                 'precinct',
                 'TOTAL BALLOTS CAST Original',
                 'BIDEN AND HARRIS Original',
                 'HAWKINS AND WALKER Original',
                 'JORGENSEN AND COHEN Original',
                 'TRUMP AND PENCE Original',
                 'ALL OTHERS Original',
                 'BLANKS Original',
                 'OVERVOTES Original',
                 'TOTAL BALLOTS CAST Audited',
                 'BIDEN AND HARRIS Audited',
                 'HAWKINS AND WALKER Audited',
                 'JORGENSEN AND COHEN Audited',
                 'TRUMP AND PENCE Audited',
                 'ALL OTHERS Audited',
                 'BLANKS Audited',
                 'OVERVOTES Audited'
                 ]])

#Split and melt by candidate
cands = ['TOTAL BALLOTS CAST',
         'BIDEN AND HARRIS',
         'HAWKINS AND WALKER',
         'JORGENSEN AND COHEN',
         'TRUMP AND PENCE',
         'ALL OTHERS',
         'BLANKS',
         'OVERVOTES'
         ]
pres = pd.DataFrame()
for cand in cands:
    origVar = cand + ' Original'
    audVar = cand + ' Audited'
    currdf = copy(prsen[['town', 'precinct', origVar, audVar]])
    currdf['candidate'] = cand
    currdf = currdf.rename(columns={origVar: 'original_votes',
                                    audVar: 'audited_votes'
                          })
    pres = pd.concat([pres, currdf])

pres['state'] = 'MASSACHUSETTS'
pres['town'] = pres['town'].astype(str).str.upper()
pres['date'] = '2020-11-03'
pres['office'] = 'US PRESIDENT'
pres['diff'] = pres.audited_votes - pres.original_votes

cand_reps = {
        'BIDEN AND HARRIS': 'JOSEPH R BIDEN',
        'HAWKINS AND WALKER': 'HOWARD G HAWKINS',
        'JORGENSEN AND COHEN': 'JO JORGENSEN',
        'TRUMP AND PENCE': 'DONALD J TRUMP',
        'ALL OTHERS': 'OTHERS'
}
for cand in cand_reps.keys():
    pres.loc[pres.candidate == cand, 'candidate'] = cand_reps[cand]

pres['party'] = ''
party_map = {
        'JOSEPH R BIDEN': 'DEMOCRAT',
        'HOWARD G HAWKINS': 'GREEN',
        'JO JORGENSEN': 'LIBERTARIAN',
        'DONALD J TRUMP': 'REPUBLICAN'
}
for cand in party_map.keys():
    pres.loc[pres.candidate == cand, 'party'] = party_map[cand]

pres = pres[['state',
             'town',
             'date',
             'office',
             'precinct',
             'candidate',
             'party',
             'original_votes',
             'audited_votes',
             'diff'
             ]]

#Now same for senate
sen = copy(prsen[['town',
                 'precinct',
                 'TOTAL BALLOTS CAST.3',
                 'EDWARD J. MARKEY Original',
                 "KEVIN J. O'CONNOR Original",
                 'SHIVA AYYADURAI (WRITE-IN) Original',
                 'ALL OTHERS Original.1',
                 'BLANKS Original.1',
                 'OVERVOTES Original.1',
                 'TOTAL BALLOTS CAST.4',
                 'EDWARD J. MARKEY Audited',
                 "KEVIN J. O'CONNOR Audited",
                 'SHIVA AYYADURAI (WRITE-IN) Audited',
                 'ALL OTHERS Audited.1',
                 'BLANKS Audited.1',
                 'OVERVOTES Audited.1',
                 ]])

sen = sen.rename(columns={
    'ALL OTHERS Original.1': 'ALL OTHERS Original',
    'BLANKS Original.1': 'BLANKS Original',
    'OVERVOTES Original.1': 'OVERVOTES Original',
    'ALL OTHERS Audited.1': 'ALL OTHERS Audited',
    'BLANKS Audited.1': 'BLANKS Audited',
    'OVERVOTES Audited.1': 'OVERVOTES Audited',
    'TOTAL BALLOTS CAST.3': 'TOTAL BALLOTS CAST Original',
    'TOTAL BALLOTS CAST.4': 'TOTAL BALLOTS CAST Audited'
    })

#Split and melt by candidate
cands = ['TOTAL BALLOTS CAST',
         "EDWARD J. MARKEY",
         "KEVIN J. O'CONNOR",
         "SHIVA AYYADURAI (WRITE-IN)",
         'ALL OTHERS',
         'BLANKS',
         'OVERVOTES'
         ]
sen = pd.DataFrame()
for cand in cands:
    origVar = cand + ' Original'
    audVar = cand + ' Audited'
    currdf = copy(prsen[['town', 'precinct', origVar, audVar]])
    currdf['candidate'] = cand
    currdf = currdf.rename(columns={origVar: 'original_votes',
                                    audVar: 'audited_votes'
                          })
    sen = pd.concat([sen, currdf])

sen['state'] = 'MASSACHUSETTS'
sen['town'] = sen['town'].astype(str).str.upper()
sen['date'] = '2020-11-03'
sen['office'] = 'US SENATE'
sen['diff'] = sen.audited_votes - sen.original_votes

cand_reps = {
        'SHIVA AYYADURAI (WRITE-IN)': 'SHIVA AYYADURAI',
}
for cand in cand_reps.keys():
    sen.loc[sen.candidate == cand, 'candidate'] = cand_reps[cand]

sen['party'] = ''
party_map = {
        'EDWARD J. MARKEY': 'DEMOCRAT',
        "KEVIN J. O'CONNOR": 'REPUBLICAN'
}
for cand in party_map.keys():
    sen.loc[sen.candidate == cand, 'party'] = party_map[cand]

sen = sen[['state',
             'town',
             'date',
             'office',
             'precinct',
             'candidate',
             'party',
             'original_votes',
             'audited_votes',
             'diff'
             ]]

spclean = pd.concat([pres, sen])

#We will deal with total ballots cast separately
spclean = spclean.loc[spclean.candidate != 'TOTAL BALLOTS CAST']

spclean.to_csv(f'{SAVE_DIR}President and US Senate Recleaned.csv',
            index=False)


#On to Question 1
quest = pd.read_excel(f'{RAW_DIR}{RAW_NAME}', 'Question 1', dtype={'Wd.': str})

quest['precinct'] = quest['Wd.'].astype(str) + '-' + quest['Pct.'].astype(str)

quest = quest.rename(columns={
    'CITY/TOWN': 'town'
    })

quest.town = quest.town.astype(str).str.upper()

yes = copy(quest[['town', 'precinct', 'YES Original', 'YES Audited']])
yes = yes.rename(columns={'YES Original': 'original_votes',
                          'YES Audited': 'audited_votes'})
no = copy(quest[['town', 'precinct', 'NO Original', 'NO Audited']])
no = no.rename(columns={'NO Original': 'original_votes',
                        'NO Audited': 'audited_votes'})
tot = copy(quest[['town', 'precinct',
                  'TOTAL BALLOTS CAST Original',
                  'TOTAL BALLOTS CAST Audited']])
tot = tot.rename(columns={'TOTAL BALLOTS CAST Original': 'original_votes',
                          'TOTAL BALLOTS CAST Audited': 'audited_votes'})
over = copy(quest[['town', 'precinct',
                   'OVERVOTES',
                   'OVERVOTES Audited']])
over = over.rename(columns={'OVERVOTES': 'original_votes',
                            'OVERVOTES Audited': 'audited_votes'})
blank = copy(quest[['town', 'precinct',
                   'BLANKS',
                   'BLANKS Audited']])
blank = blank.rename(columns={'BLANKS': 'original_votes',
                              'BLANKS Audited': 'audited_votes'})

yes['candidate'] = 'YES'
no['candidate'] = 'NO'
tot['candidate'] = 'TOTAL BALLOTS CAST'
over['candidate'] = 'OVERVOTES'
blank['candidate'] = 'BLANKS'

quecln = pd.concat([yes, no, tot, over, blank])

quecln['state'] = 'MASSACHUSETTS'
quecln['date'] = '2020-11-03'
quecln['office'] = 'Question 1'

quecln['party'] = ''
quecln['diff'] = quecln['audited_votes'] - quecln['original_votes']

quecln = quecln[['state',
                 'town',
                 'date',
                 'office',
                 'precinct',
                 'candidate',
                 'party',
                 'original_votes',
                 'audited_votes',
                 'diff'
                 ]]

quecln.to_csv(f'{SAVE_DIR}Question 1_MA.csv', index=False)


#State senate
stsen = pd.read_excel(f'{RAW_DIR}{RAW_NAME}',
                      'State Senator',
                      dtype = {'Wd.': str})

stsen = stsen.rename(columns={
    'DISTRICT': 'district',
    'CITY/TOWN': 'town',
    'CANDIDATES': 'candidate',
    'ELECTION NIGHT': 'original_votes',
    'AUDIT': 'audited_votes',
    'DIFFERENCE': 'diff'
    })

stsen = stsen.fillna(method='ffill')

stsen['precinct'] = stsen['Wd.'].astype(str) + \
                 '-' + \
                 stsen['Pct.'].astype(str)
stsen = stsen.drop(columns=['Wd.','Pct.','OFFICE'], axis=1)

stsen.candidate = stsen.candidate.astype(str).str.upper()
stsen.town = stsen.town.astype(str).str.upper()
stsen.district = stsen.district.astype(str).str.upper()

stsen['date'] = '2020-11-03'
stsen['office'] = 'STATE SENATE'
stsen['state'] = 'MASSACHUSETTS'

stsen = stsen[['state',
                 'town',
                 'district',
                 'date',
                 'office',
                 'precinct',
                 'candidate',
                 'original_votes',
                 'audited_votes',
                 'diff'
                 ]]

stsen.to_csv(f'{SAVE_DIR}State Senator_MA.csv', index=False)


#State House
sthse = pd.read_excel(f'{RAW_DIR}{RAW_NAME}',
                      'State Rep',
                      dtype = {'Wd.': str})

sthse = sthse.rename(columns={
    'DISTRICT': 'district',
    'CITY/TOWN': 'town',
    'CANDIDATES': 'candidate',
    'ELECTION NIGHT': 'original_votes',
    'AUDIT': 'audited_votes',
    'DIFFERENCE': 'diff'
    })

sthse = sthse.fillna(method='ffill')

sthse['precinct'] = sthse['Wd.'].astype(str) + \
                 '-' + \
                 sthse['Pct.'].astype(str)
sthse = sthse.drop(columns=['Wd.','Pct.','OFFICE'], axis=1)

sthse.candidate = sthse.candidate.astype(str).str.upper()
sthse.town = sthse.town.astype(str).str.upper()
sthse.district = sthse.district.astype(str).str.upper()

sthse['date'] = '2020-11-03'
sthse['office'] = 'STATE REPRESENTATIVE'
sthse['state'] = 'MASSACHUSETTS'

sthse = sthse[['state',
                 'town',
                 'district',
                 'date',
                 'office',
                 'precinct',
                 'candidate',
                 'original_votes',
                 'audited_votes',
                 'diff'
                 ]]

sthse.to_csv(f'{SAVE_DIR}State Rep_MA.csv', index=False)
